In [1]:
# import os
# import subprocess
# from pathlib import Path
# # Set up the directories
# base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5/ECLIPSE_BIO_SFTP")
# files_dir = base_dir / "files"
# output_dir = base_dir / "output_bw"
# output_dir.mkdir(exist_ok=True)
# # Function to run bamCoverage
# def run_bamCoverage(bam_input, bw_output, num_cores=32):
# cmd = [
# "bamCoverage",
# "--bam", str(bam_input),
# "--outFileName", str(bw_output),
# "--outFileFormat", "bigwig",
# "--binSize", "1", # per nucleotide resolution
# "--normalizeUsing", "RPKM", # Normalization method; can be changed as needed
# "--numberOfProcessors", str(num_cores) # Use 32 cores
# ]
# subprocess.run(cmd, check=True)
# # Get all BAM files in the files directory
# bam_files = list(files_dir.glob("*.bam"))
# # Generate BigWig files for each BAM file
# for bam_file in bam_files:
# output_bw = output_dir / f"{bam_file.stem}.bw"
# run_bamCoverage(bam_file, output_bw)
# print(f"Generated BigWig file: {output_bw}")
# print("All BAM files have been converted to BigWig format.")
In [2]:
# import os
# import subprocess
# from pathlib import Path
# # Set up the directories
# base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5/ECLIPSE_BIO_SFTP")
# files_dir = base_dir / "files"
# output_dir = base_dir / "output_bw"
# output_dir.mkdir(exist_ok=True)
# # Function to run bamCoverage for positively stranded data
# def run_bamCoverage(bam_input, bw_output, num_cores=32):
# cmd = [
# "bamCoverage",
# "--bam", str(bam_input),
# "--outFileName", str(bw_output),
# "--outFileFormat", "bigwig",
# "--binSize", "1", # per nucleotide resolution
# "--normalizeUsing", "RPKM", # Normalization method; can be changed as needed
# "--numberOfProcessors", str(num_cores), # Use 32 cores
# ]
# subprocess.run(cmd, check=True)
# # Get all BAM files in the files directory
# bam_files = [
# files_dir / "BM_KO.bam",
# files_dir / "BM_WT.bam"
# ]
# # Generate positively stranded BigWig files for each BAM file
# for bam_file in bam_files:
# output_bw = output_dir / f"{bam_file.stem}.bw"
# run_bamCoverage(bam_file, output_bw)
# print(f"Generated BigWig file: {output_bw}")
# print("All BAM files have been converted to BigWig format.")
In [3]:
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 5
title = genes (gtf) with arrow
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color_arrow = red
[spacer]
height = 1
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = 1
title = Chromosomal Location
"""
def generate_track_section(bw_file, title, color):
return f"""
[{title}]
file = {bw_file}
height = 5
title = {title}
file_type = bigwig
color = {color}
"""
# Track info mapping file names to desired colors
track_info = {
"Input1_MLTC-1_4.bw": "blue",
"Input2_MLTC-1_5.bw": "blue",
"Input3_MLTC-1_6.bw": "blue",
"IP1_MLTC-1_4.bw": "red",
"IP2_MLTC-1_5.bw": "red",
"IP3_MLTC-1_6.bw": "red",
}
# Generate the .ini content
ini_content = header
for bw_file, color in track_info.items():
title = f"{bw_file} coverage"
ini_content += generate_track_section(f"ECLIPSE_BIO_SFTP/output_bw/{bw_file}", title, color)
ini_content += footer
# Write the configuration content to a file
with open("bed_and_gtf_tracks_with_peaks.ini", "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully.")
Configuration file generated successfully.
In [4]:
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the Pgbd5 gene based on the given bed file
region = "chr8:124368294-124440567"
# Generate the pyGenomeTracks plot
subprocess.run([
"pyGenomeTracks",
"--tracks", "bed_and_gtf_tracks_with_peaks.ini",
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "130",
"-o", "output_Pgbd5_colored_peaks.png"
])
print("Plot generated successfully.")
INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [genes arrow] 100%|██████████| 19/19 [00:00<00:00, 7586.80it/s] INFO:pygenometracks.tracksClass:initialize 4. [spacer] INFO:pygenometracks.tracksClass:initialize 5. [Input1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:initialize 6. [Input2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 7. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 8. [IP1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:initialize 9. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 10. [IP3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 11. [x-axis] INFO:pygenometracks.tracksClass:initialize 12. [spacer] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.5095622539520264 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 43.08510638297873. Dpi is set to 130 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 12.58,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (12.58, -0.08) INFO:pygenometracks.tracksClass:plotting 4. [spacer] INFO:pygenometracks.tracksClass:plotting 5. [Input1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:plotting 6. [Input2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 7. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 8. [IP1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:plotting 9. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 10. [IP3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 11. [x-axis] INFO:pygenometracks.tracksClass:plotting 12. [spacer]
Plot generated successfully.
In [5]:
from IPython.display import Image
# Display the generated plot
Image(filename='output_Pgbd5_colored_peaks.png')
Out[5]:
In [6]:
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the Pgbd5 gene based on the given bed file
region = "chr8:124368925-124372074"
# Generate the pyGenomeTracks plot
subprocess.run([
"pyGenomeTracks",
"--tracks", "bed_and_gtf_tracks_with_peaks.ini",
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "130",
"-o", "output_Pgbd5_3primeUTR_colored_peaks.png"
])
INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [genes arrow] 100%|██████████| 14/14 [00:00<00:00, 7768.26it/s] INFO:pygenometracks.tracksClass:initialize 4. [spacer] INFO:pygenometracks.tracksClass:initialize 5. [Input1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:initialize 6. [Input2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 7. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 8. [IP1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:initialize 9. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 10. [IP3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 11. [x-axis] INFO:pygenometracks.tracksClass:initialize 12. [spacer] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.4960031509399414 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 43.08510638297873. Dpi is set to 130 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 12.58,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (12.58, -0.08) INFO:pygenometracks.tracksClass:plotting 4. [spacer] INFO:pygenometracks.tracksClass:plotting 5. [Input1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:plotting 6. [Input2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 7. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 8. [IP1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:plotting 9. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 10. [IP3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 11. [x-axis] INFO:pygenometracks.tracksClass:plotting 12. [spacer]
Out[6]:
CompletedProcess(args=['pyGenomeTracks', '--tracks', 'bed_and_gtf_tracks_with_peaks.ini', '--region', 'chr8:124368925-124372074', '--trackLabelFraction', '0.2', '--width', '38', '--dpi', '130', '-o', 'output_Pgbd5_3primeUTR_colored_peaks.png'], returncode=0)
In [7]:
from IPython.display import Image
# Display the generated plot
Image(filename='output_Pgbd5_3primeUTR_colored_peaks.png')
Out[7]:
In [8]:
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the Pgbd5 gene based on the given bed file
region = "chr1:34305572-34309512"
# Generate the pyGenomeTracks plot
subprocess.run([
"pyGenomeTracks",
"--tracks", "bed_and_gtf_tracks_with_peaks.ini",
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "130",
"-o", "output_Dst_3primeUTR_colored_peaks.png"
])
INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [genes arrow] 100%|██████████| 14/14 [00:00<00:00, 2837.69it/s] INFO:pygenometracks.tracksClass:initialize 4. [spacer] INFO:pygenometracks.tracksClass:initialize 5. [Input1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:initialize 6. [Input2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 7. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 8. [IP1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:initialize 9. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 10. [IP3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 11. [x-axis] INFO:pygenometracks.tracksClass:initialize 12. [spacer] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.5191502571105957 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 43.08510638297873. Dpi is set to 130 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 14.879999999999999,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (14.879999999999999, -0.08) INFO:pygenometracks.tracksClass:plotting 4. [spacer] INFO:pygenometracks.tracksClass:plotting 5. [Input1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:plotting 6. [Input2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 7. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 8. [IP1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:plotting 9. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 10. [IP3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 11. [x-axis] INFO:pygenometracks.tracksClass:plotting 12. [spacer]
Out[8]:
CompletedProcess(args=['pyGenomeTracks', '--tracks', 'bed_and_gtf_tracks_with_peaks.ini', '--region', 'chr1:34305572-34309512', '--trackLabelFraction', '0.2', '--width', '38', '--dpi', '130', '-o', 'output_Dst_3primeUTR_colored_peaks.png'], returncode=0)
In [9]:
from IPython.display import Image
# Display the generated plot
Image(filename='output_Dst_3primeUTR_colored_peaks.png')
Out[9]:
NOW I WILL ATTEMPT TO MAKE THE FIGURE FOR THE PUBLICATION FOR THE GENE Pgbd5
In [10]:
import csv
# Input and output file paths
input_csv_path = "/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5/ECLIPSE_BIO_SFTP/files/peak_table.csv"
output_bed_path = "/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5/ECLIPSE_BIO_SFTP/files/ZPF36L2.reproducible_eclip_peaks_converted.bed"
# Reading CSV and writing out BED6
with open(input_csv_path, newline='') as csvfile:
reader = csv.DictReader(csvfile)
with open(output_bed_path, 'w') as bedfile:
for row in reader:
chromosome = row["Chromosome"].replace(",", "")
start = row["Start"].replace(",", "")
end = row["End"].replace(",", "")
strand = row["Strand"]
name = f'{chromosome}:{start}-{end} ({strand})'
# chrom, start, end, name, score, strand
bedfile.write(f'{chromosome}\t{start}\t{end}\t{name}\t0\t{strand}\n')
print("CSV file has been converted to BED format successfully.")
CSV file has been converted to BED format successfully.
In [11]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
bm_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "BM_bw_files"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 5
title = genes (gtf) with arrow
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = red
[spacer]
height = 1
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = 1
title = Chromosomal Location
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Track info for BM_bw_files
bm_files = sorted([f for f in bm_bw_dir.glob("*.bw")])
# Reorder the files as specified: WT1, KO1, WT2, KO2, etc.
ordered_bm_files = []
for i in range(1, 6):
ordered_bm_files.append(bm_bw_dir / f"WT{i}-BMcells.bw")
ordered_bm_files.append(bm_bw_dir / f"KO{i}-BMcells.bw")
# Track info for output_bw files
output_bw_files = sorted([f for f in output_bw_dir.glob("*.bw")])
# Generate the .ini content
ini_content = header
# Add BM_bw_files coverage tracks with specified min and max values and colors
for bw_file in ordered_bm_files:
title = f"{bw_file.name} coverage"
color = "blue" if "WT" in bw_file.name else "orange"
ini_content += generate_track_section(bw_file, title, 'bigwig', min_value=0, max_value=0.25, color=color)
# Add the converted peaks track
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks_converted.bed"
# Create the section for the new BED file without labels
ini_content += generate_track_section(
converted_peaks_file, 'Converted reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", strand=True, labels=False, color="black"
)
# Add output_bw coverage tracks with specific max values
input_max_value = 25000 # Specific maximum value for input bam coverage tracks
ip_max_value = 25000 # Specific maximum value for IP bam coverage tracks
bw_colors = {
"Input": "blue",
"IP": "red"
}
for bw_file in output_bw_files:
color = "black" # default color if no match found
max_value = None
for key, value in bw_colors.items():
if key in bw_file.name:
color = value
if key == "Input":
max_value = input_max_value
elif key == "IP":
max_value = ip_max_value
break
title = f"{bw_file.name} coverage"
ini_content += generate_track_section(bw_file, title, 'bigwig', color=color, max_value=max_value)
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Pgbd5.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully.")
Configuration file generated successfully.
In [12]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the Pgbd5 gene based on the given bed file
region = "chr8:124368294-124440567"
# Generate the pyGenomeTracks plot
subprocess.run([
"pyGenomeTracks",
"--tracks", config_file_path,
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "130",
"-o", "output_Pgbd5_colored_peaks.png"
])
print("Plot generated successfully.")
WARNING:pygenometracks.tracksClass:In section 15. [Converted reproducible eCLIP peaks], these parameters are unused:['strand']. INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [genes arrow] 100%|██████████| 19/19 [00:00<00:00, 8076.60it/s] INFO:pygenometracks.tracksClass:initialize 4. [spacer] INFO:pygenometracks.tracksClass:initialize 5. [WT1-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 6. [KO1-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 7. [WT2-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 8. [KO2-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 9. [WT3-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 10. [KO3-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 11. [WT4-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 12. [KO4-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 13. [WT5-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 14. [KO5-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 15. [Converted reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 15. [Converted reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 47/47 [00:00<00:00, 63223.95it/s] INFO:pygenometracks.tracksClass:initialize 16. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:initialize 17. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:initialize 18. [IP1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:initialize 19. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 20. [IP3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 21. [Input1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:initialize 22. [Input2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 23. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 24. [x-axis] INFO:pygenometracks.tracksClass:initialize 25. [spacer] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.5117621421813965 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 71.80851063829788. Dpi is set to 130 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 12.58,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (12.58, -0.08) INFO:pygenometracks.tracksClass:plotting 4. [spacer] INFO:pygenometracks.tracksClass:plotting 5. [WT1-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 6. [KO1-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 7. [WT2-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 8. [KO2-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 9. [WT3-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 10. [KO3-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 11. [WT4-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 12. [KO4-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 13. [WT5-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 14. [KO5-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 15. [Converted reproducible eCLIP peaks] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 16. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:plotting 17. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:plotting 18. [IP1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:plotting 19. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 20. [IP3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 21. [Input1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:plotting 22. [Input2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 23. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 24. [x-axis] INFO:pygenometracks.tracksClass:plotting 25. [spacer]
Plot generated successfully.
In [13]:
# %%
from IPython.display import display
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
# Define the custom title
custom_title = 'Coverage Plot for Pgbd5 3\' UTR Region'
# Load the image
img = mpimg.imread('output_Pgbd5_colored_peaks.png')
# Plot the image with a custom title
plt.figure(figsize=(38, 38))
plt.imshow(img)
plt.title(custom_title, fontsize=20)
plt.axis('off') # Hide axes
plt.show()
In [14]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
bm_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "BM_bw_files"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 5
title = genes (gtf) with arrow
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = red
[spacer]
height = 1
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = 1
title = Chromosomal Location
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Track info for BM_bw_files
bm_files = sorted([f for f in bm_bw_dir.glob("*.bw")])
# Reorder the files as specified: WT1, KO1, WT2, KO2, etc.
ordered_bm_files = []
for i in range(1, 6):
ordered_bm_files.append(bm_bw_dir / f"WT{i}-BMcells.bw")
ordered_bm_files.append(bm_bw_dir / f"KO{i}-BMcells.bw")
# Track info for output_bw files
output_bw_files = sorted([f for f in output_bw_dir.glob("*.bw")])
# Generate the .ini content
ini_content = header
# Add BM_bw_files coverage tracks with specified min and max values and colors
for bw_file in ordered_bm_files:
title = f"{bw_file.name} coverage"
color = "blue" if "WT" in bw_file.name else "orange"
ini_content += generate_track_section(bw_file, title, 'bigwig', min_value=0, max_value=0.4, color=color)
# Add the converted peaks track
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks_converted.bed"
# Create the section for the new BED file without labels
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", strand=True, labels=False, color="black"
)
# Add output_bw coverage tracks with specific max values
input_max_value = 350000 # Specific maximum value for input bam coverage tracks
ip_max_value = 350000 # Specific maximum value for IP bam coverage tracks
bw_colors = {
"Input": "blue",
"IP": "red"
}
for bw_file in output_bw_files:
color = "black" # default color if no match found
max_value = None
for key, value in bw_colors.items():
if key in bw_file.name:
color = value
if key == "Input":
max_value = input_max_value
elif key == "IP":
max_value = ip_max_value
break
title = f"{bw_file.name} coverage"
ini_content += generate_track_section(bw_file, title, 'bigwig', color=color, max_value=max_value, min_value=0)
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Cyp11a1.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully.")
Configuration file generated successfully.
In [15]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the Pgbd5 gene based on the given bed file
region = "chr9:58020666-58027128"
# Generate the pyGenomeTracks plot
subprocess.run([
"pyGenomeTracks",
"--tracks", config_file_path,
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "130",
"-o", "output_Cyp11a1_colored_peaks.png"
])
print("Plot generated successfully.")
WARNING:pygenometracks.tracksClass:In section 15. [Reproducible eCLIP peaks], these parameters are unused:['strand']. INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [genes arrow] 100%|██████████| 27/27 [00:00<00:00, 7460.22it/s] INFO:pygenometracks.tracksClass:initialize 4. [spacer] INFO:pygenometracks.tracksClass:initialize 5. [WT1-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 6. [KO1-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 7. [WT2-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 8. [KO2-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 9. [WT3-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 10. [KO3-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 11. [WT4-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 12. [KO4-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 13. [WT5-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 14. [KO5-BMcells.bw coverage] INFO:pygenometracks.tracksClass:initialize 15. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 15. [Reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 30/30 [00:00<00:00, 69827.48it/s] INFO:pygenometracks.tracksClass:initialize 16. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:initialize 17. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:initialize 18. [IP1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:initialize 19. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 20. [IP3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 21. [Input1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:initialize 22. [Input2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 23. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 24. [x-axis] INFO:pygenometracks.tracksClass:initialize 25. [spacer] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.5134139060974121 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 71.80851063829788. Dpi is set to 130 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 7.9799999999999995,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (7.9799999999999995, -0.08) INFO:pygenometracks.tracksClass:plotting 4. [spacer] INFO:pygenometracks.tracksClass:plotting 5. [WT1-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 6. [KO1-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 7. [WT2-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 8. [KO2-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 9. [WT3-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 10. [KO3-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 11. [WT4-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 12. [KO4-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 13. [WT5-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 14. [KO5-BMcells.bw coverage] INFO:pygenometracks.tracksClass:plotting 15. [Reproducible eCLIP peaks] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 16. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:plotting 17. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:plotting 18. [IP1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:plotting 19. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 20. [IP3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 21. [Input1_MLTC-1_4.bw coverage] INFO:pygenometracks.tracksClass:plotting 22. [Input2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 23. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 24. [x-axis] INFO:pygenometracks.tracksClass:plotting 25. [spacer]
Plot generated successfully.
In [16]:
# %%
from IPython.display import display
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
# Define the custom title
custom_title = 'Coverage Plot for Cyp11a1 3\' UTR Region'
# Load the image
img = mpimg.imread('output_Cyp11a1_colored_peaks.png')
# Plot the image with a custom title
plt.figure(figsize=(38, 38))
plt.imshow(img)
plt.title(custom_title, fontsize=20)
plt.axis('off') # Hide axes
plt.show()
Final Versions:
In [17]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 5
title = genes (gtf) with arrow
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
[spacer]
height = 1
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = 1
title = Chromosomal Location
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized
files_to_plot = {
"BM_KO.bw": "orange",
"BM_WT.bw": "blue",
"IP2_MLTC-1_5.bw": "orange",
"Input3_MLTC-1_6.bw": "blue",
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file in ["BM_KO.bw", "BM_WT.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=316, color=color)
# Add the converted peaks track above the MLTC files
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks_converted.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", strand=True, labels=False, color="black"
)
# Add output_bw coverage tracks with specific max values
input_max_value = 350000 # Specific maximum value for input bam coverage tracks
ip_max_value = 350000 # Specific maximum value for IP bam coverage tracks
for bw_file in ["Input3_MLTC-1_6.bw", "IP2_MLTC-1_5.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
max_value = input_max_value if "Input" in bw_file else ip_max_value
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', color=color, max_value=max_value, min_value=0)
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Cyp11a1.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully.")
Configuration file generated successfully.
In [18]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene based on the given bed file
region = "chr9:58020666-58027128"
# Generate the pyGenomeTracks plot
subprocess.run([
"pyGenomeTracks",
"--tracks", config_file_path,
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "130",
"-o", "output_Cyp11a1_colored_peaks.png"
])
print("Plot generated successfully.")
WARNING:pygenometracks.tracksClass:In section 7. [Reproducible eCLIP peaks], these parameters are unused:['strand']. INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [genes arrow] 100%|██████████| 27/27 [00:00<00:00, 7343.16it/s] INFO:pygenometracks.tracksClass:initialize 4. [spacer] INFO:pygenometracks.tracksClass:initialize 5. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:initialize 6. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:initialize 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 7. [Reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 30/30 [00:00<00:00, 128791.32it/s] INFO:pygenometracks.tracksClass:initialize 8. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 9. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 10. [x-axis] INFO:pygenometracks.tracksClass:initialize 11. [spacer] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.5065457820892334 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 27.127659574468087. Dpi is set to 130 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 7.9799999999999995,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (7.9799999999999995, -0.08) INFO:pygenometracks.tracksClass:plotting 4. [spacer] INFO:pygenometracks.tracksClass:plotting 5. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:plotting 6. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:plotting 7. [Reproducible eCLIP peaks] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 9. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 10. [x-axis] INFO:pygenometracks.tracksClass:plotting 11. [spacer]
Plot generated successfully.
In [19]:
# %%
from IPython.display import display
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
# Define the custom title
custom_title = 'Coverage Plot for Cyp11a1 3\' UTR Region'
# Load the image
img = mpimg.imread('output_Cyp11a1_colored_peaks.png')
# Plot the image with a custom title
plt.figure(figsize=(11, 8.5))
plt.imshow(img)
plt.title(custom_title, fontsize=12)
plt.axis('off') # Hide axes
plt.show()
In [20]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 5
title = genes (gtf) with arrow
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
[spacer]
height = 1
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = 1
title = Chromosomal Location
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized
files_to_plot = {
"BM_KO.bw": "orange",
"BM_WT.bw": "blue",
"IP2_MLTC-1_5.bw": "orange",
"Input3_MLTC-1_6.bw": "blue",
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file in ["BM_KO.bw", "BM_WT.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=155, color=color)
# Add the converted peaks track above the MLTC files
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", labels=False, color="black"
)
# Add output_bw coverage tracks with specific max values
input_max_value = 55000 # Specific maximum value for input bam coverage tracks
ip_max_value = 55000 # Specific maximum value for IP bam coverage tracks
for bw_file in ["Input3_MLTC-1_6.bw", "IP2_MLTC-1_5.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
max_value = input_max_value if "Input" in bw_file else ip_max_value
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', color=color, min_value=0, max_value=max_value)
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Pgbd5.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully.")
Configuration file generated successfully.
In [21]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene based on the given bed file
region = "chr8:124368294-124440567"
# Generate the pyGenomeTracks plot
subprocess.run([
"pyGenomeTracks",
"--tracks", config_file_path,
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "130",
"-o", "output_Pgbd5_colored_peaks.png"
])
print("Plot generated successfully.")
INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [genes arrow] 100%|██████████| 19/19 [00:00<00:00, 7957.24it/s] INFO:pygenometracks.tracksClass:initialize 4. [spacer] INFO:pygenometracks.tracksClass:initialize 5. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:initialize 6. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:initialize 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 7. [Reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 47/47 [00:00<00:00, 142849.48it/s] INFO:pygenometracks.tracksClass:initialize 8. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 9. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 10. [x-axis] INFO:pygenometracks.tracksClass:initialize 11. [spacer] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.5007359981536865 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 27.127659574468087. Dpi is set to 130 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 12.58,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (12.58, -0.08) INFO:pygenometracks.tracksClass:plotting 4. [spacer] INFO:pygenometracks.tracksClass:plotting 5. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:plotting 6. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:plotting 7. [Reproducible eCLIP peaks] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 9. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 10. [x-axis] INFO:pygenometracks.tracksClass:plotting 11. [spacer]
Plot generated successfully.
In [22]:
# %%
from IPython.display import display
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
# Define the custom title
custom_title = 'Coverage Plot for Pgbd5 Gene'
# Load the image
img = mpimg.imread('output_Pgbd5_colored_peaks.png')
# Plot the image with a custom title
plt.figure(figsize=(11, 8.5))
plt.imshow(img)
plt.title(custom_title, fontsize=12)
plt.axis('off') # Hide axes
plt.show()
In [23]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 5
title = genes (gtf) with arrow
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
[spacer]
height = 1
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = 1
title = Chromosomal Location
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized
files_to_plot = {
"BM_KO.bw": "orange",
"BM_WT.bw": "blue",
"IP2_MLTC-1_5.bw": "orange",
"Input3_MLTC-1_6.bw": "blue",
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file in ["BM_KO.bw", "BM_WT.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=155, color=color)
# Add the converted peaks track above the MLTC files
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", strand=True, labels=False, color="black"
)
# Add output_bw coverage tracks with specific max values
bw_max_value = 15000 # Specific maximum value for both input and IP bam coverage tracks
for bw_file in ["IP2_MLTC-1_5.bw", "Input3_MLTC-1_6.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', color=color, max_value=bw_max_value, min_value=0)
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Pgbd5.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully.")
Configuration file generated successfully.
In [24]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene based on the given bed file
region = "chr8:124368294-124371491"
# Generate the pyGenomeTracks plot
subprocess.run([
"pyGenomeTracks",
"--tracks", config_file_path,
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "130",
"-o", "output_Pgbd5_colored_peaks.png"
])
print("Plot generated successfully.")
WARNING:pygenometracks.tracksClass:In section 7. [Reproducible eCLIP peaks], these parameters are unused:['strand']. INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [genes arrow] 100%|██████████| 14/14 [00:00<00:00, 7678.86it/s] INFO:pygenometracks.tracksClass:initialize 4. [spacer] INFO:pygenometracks.tracksClass:initialize 5. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:initialize 6. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:initialize 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 7. [Reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 47/47 [00:00<00:00, 69584.29it/s] INFO:pygenometracks.tracksClass:initialize 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 10. [x-axis] INFO:pygenometracks.tracksClass:initialize 11. [spacer] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.4990837574005127 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 27.127659574468087. Dpi is set to 130 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 7.9799999999999995,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (7.9799999999999995, -0.08) INFO:pygenometracks.tracksClass:plotting 4. [spacer] INFO:pygenometracks.tracksClass:plotting 5. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:plotting 6. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:plotting 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*Warning* No intervals were found for file /mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5/ECLIPSE_BIO_SFTP/files/ZPF36L2.reproducible_eclip_peaks.bed in section '7. [Reproducible eCLIP peaks]' for the interval plotted (chr8:124368294-124371491). DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 10. [x-axis] INFO:pygenometracks.tracksClass:plotting 11. [spacer]
Plot generated successfully.
In [25]:
# %%
from IPython.display import display
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
# Define the custom title
custom_title = 'Coverage Plot for Pgbd5 3\' UTR Region Condenced'
# Load the image
img = mpimg.imread('output_Pgbd5_colored_peaks.png')
# Plot the image with a custom title
plt.figure(figsize=(11, 8.5))
plt.imshow(img)
plt.title(custom_title, fontsize=12)
plt.axis('off') # Hide axes
plt.show()
In [26]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene based on the given bed file
region = "chr8:124368294-124385000"
# Generate the pyGenomeTracks plot
subprocess.run([
"pyGenomeTracks",
"--tracks", config_file_path,
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "130",
"-o", "output_Pgbd5_colored_peaks.png"
])
print("Plot generated successfully.")
WARNING:pygenometracks.tracksClass:In section 7. [Reproducible eCLIP peaks], these parameters are unused:['strand']. INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [genes arrow] 100%|██████████| 14/14 [00:00<00:00, 7750.83it/s] INFO:pygenometracks.tracksClass:initialize 4. [spacer] INFO:pygenometracks.tracksClass:initialize 5. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:initialize 6. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:initialize 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 7. [Reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 47/47 [00:00<00:00, 69806.05it/s] INFO:pygenometracks.tracksClass:initialize 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 10. [x-axis] INFO:pygenometracks.tracksClass:initialize 11. [spacer] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.4934067726135254 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 27.127659574468087. Dpi is set to 130 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 12.58,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (12.58, -0.08) INFO:pygenometracks.tracksClass:plotting 4. [spacer] INFO:pygenometracks.tracksClass:plotting 5. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:plotting 6. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:plotting 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*Warning* No intervals were found for file /mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5/ECLIPSE_BIO_SFTP/files/ZPF36L2.reproducible_eclip_peaks.bed in section '7. [Reproducible eCLIP peaks]' for the interval plotted (chr8:124368294-124385000). DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 10. [x-axis] INFO:pygenometracks.tracksClass:plotting 11. [spacer]
Plot generated successfully.
In [27]:
# %%
from IPython.display import display
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
# Define the custom title
custom_title = 'Coverage Plot for Pgbd5 3\' UTR Region'
# Load the image
img = mpimg.imread('output_Pgbd5_colored_peaks.png')
# Plot the image with a custom title
plt.figure(figsize=(11, 8.5))
plt.imshow(img)
plt.title(custom_title, fontsize=12)
plt.axis('off') # Hide axes
plt.show()
In [28]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 5
title = genes (gtf) with arrow
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
[spacer]
height = 1
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = 1
title = Chromosomal Location
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized
files_to_plot = {
"BM_KO.bw": "orange",
"BM_WT.bw": "blue",
"IP2_MLTC-1_5.bw": "orange",
"Input3_MLTC-1_6.bw": "blue",
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file in ["BM_KO.bw", "BM_WT.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=155, color=color)
# Add the converted peaks track above the MLTC files
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", strand=True, labels=False, color="black"
)
# Add output_bw coverage tracks with specific max values
bw_max_value = 55000 # Specific maximum value for both input and IP bam coverage tracks
for bw_file in ["IP2_MLTC-1_5.bw", "Input3_MLTC-1_6.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', color=color, max_value=bw_max_value, min_value=0)
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Dst.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully.")
Configuration file generated successfully.
In [29]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 5
title = genes (gtf) with arrow
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
[spacer]
height = 1
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = 1
title = Chromosomal Location
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized
files_to_plot = {
"BM_KO.bw": "orange",
"BM_WT.bw": "blue",
"IP2_MLTC-1_5.bw": "orange",
"Input3_MLTC-1_6.bw": "blue",
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file in ["BM_KO.bw", "BM_WT.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=545, color=color)
# Add the converted peaks track above the MLTC files
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", strand=True, labels=False, color="black"
)
# Add output_bw coverage tracks with specific max values
bw_max_value = 6000 # Specific maximum value for both input and IP bam coverage tracks
for bw_file in ["IP2_MLTC-1_5.bw", "Input3_MLTC-1_6.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', color=color, max_value=bw_max_value, min_value=0)
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Dst.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully.")
Configuration file generated successfully.
In [30]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene based on the given bed file
region = "chr1:34305572-34309512"
# Generate the pyGenomeTracks plot
subprocess.run([
"pyGenomeTracks",
"--tracks", config_file_path,
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "130",
"-o", "output_Dst_colored_peaks.png"
])
print("Plot generated successfully.")
WARNING:pygenometracks.tracksClass:In section 7. [Reproducible eCLIP peaks], these parameters are unused:['strand']. INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [genes arrow] 100%|██████████| 14/14 [00:00<00:00, 2838.65it/s] INFO:pygenometracks.tracksClass:initialize 4. [spacer] INFO:pygenometracks.tracksClass:initialize 5. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:initialize 6. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:initialize 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 7. [Reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 7/7 [00:00<00:00, 40000.17it/s] INFO:pygenometracks.tracksClass:initialize 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 10. [x-axis] INFO:pygenometracks.tracksClass:initialize 11. [spacer] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.513216495513916 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 27.127659574468087. Dpi is set to 130 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 14.879999999999999,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (14.879999999999999, -0.08) INFO:pygenometracks.tracksClass:plotting 4. [spacer] INFO:pygenometracks.tracksClass:plotting 5. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:plotting 6. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:plotting 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*Warning* No intervals were found for file /mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5/ECLIPSE_BIO_SFTP/files/ZPF36L2.reproducible_eclip_peaks.bed in section '7. [Reproducible eCLIP peaks]' for the interval plotted (chr1:34305572-34309512). DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 10. [x-axis] INFO:pygenometracks.tracksClass:plotting 11. [spacer]
Plot generated successfully.
In [31]:
# %%
from IPython.display import display
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
# Define the custom title
custom_title = 'Coverage Plot for Dst 3\' UTR Region'
# Load the image
img = mpimg.imread('output_Dst_colored_peaks.png')
# Plot the image with a custom title
plt.figure(figsize=(11, 8.5))
plt.imshow(img)
plt.title(custom_title, fontsize=12)
plt.axis('off') # Hide axes
plt.show()
In [32]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
"""
genes_section = """
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 5
title = genes (gtf) with arrow
file_type = gtf
style = UCSC
display = interleaved
arrow_interval = 10
fontsize = 10
color = green
[spacer]
height = 1
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = 1
title = Chromosomal Location
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized with colors
files_to_plot = {
"BM_KO.bw": "orange",
"BM_WT.bw": "blue",
"IP2_MLTC-1_5.bw": "orange",
"Input3_MLTC-1_6.bw": "blue",
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file in ["BM_KO.bw", "BM_WT.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=15000, color=color)
# Add genes section
ini_content += genes_section
# Add the converted peaks track above the MLTC files
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", strand=True, labels=False, color="black"
)
# Add IP2*.bw and Input3*.bw coverage tracks
bw_max_value = 17500 # Specific maximum value for both input and IP bam coverage tracks
for bw_file in ["IP2_MLTC-1_5.bw", "Input3_MLTC-1_6.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', color=color, max_value=bw_max_value, min_value=0)
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Fstl1.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully.")
Configuration file generated successfully.
In [33]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene Fstl1 based on the given specification
region = "chr16:37833687-37836958"
# Generate the pyGenomeTracks plot in SVG format
subprocess.run([
"pyGenomeTracks",
"--tracks", config_file_path,
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "130",
"-o", "output_Fstl1_colored_peaks.png" # Change the output filename and extension to .svg
])
print("Plot generated successfully in SVG format.")
WARNING:pygenometracks.tracksClass:In section 7. [Reproducible eCLIP peaks], these parameters are unused:['strand']. INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:initialize 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:initialize 5. [genes arrow] 100%|██████████| 12/12 [00:00<00:00, 9727.80it/s] INFO:pygenometracks.tracksClass:initialize 6. [spacer] INFO:pygenometracks.tracksClass:initialize 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 7. [Reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 8/8 [00:00<00:00, 42799.02it/s] INFO:pygenometracks.tracksClass:initialize 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 10. [x-axis] INFO:pygenometracks.tracksClass:initialize 11. [spacer] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.4959232807159424 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 27.127659574468087. Dpi is set to 130 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:plotting 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:plotting 5. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 2.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (2.08, -0.08) INFO:pygenometracks.tracksClass:plotting 6. [spacer] INFO:pygenometracks.tracksClass:plotting 7. [Reproducible eCLIP peaks] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 10. [x-axis] INFO:pygenometracks.tracksClass:plotting 11. [spacer]
Plot generated successfully in SVG format.
In [34]:
# %%
from IPython.display import display
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
# Define the custom title
custom_title = 'Coverage Plot for Fstl1 Region'
# Load the image
img = mpimg.imread('output_Fstl1_colored_peaks.png')
# Plot the image with a custom title
plt.figure(figsize=(11, 8.5))
plt.imshow(img)
plt.title(custom_title, fontsize=12)
plt.axis('off') # Hide axes
plt.show()
In [35]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
"""
genes_section = """
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 5
title = genes (gtf) with arrow
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
display = stacked
[spacer]
height = 1
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = 1
title = Chromosomal Location
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized with colors
files_to_plot = {
"BM_KO.bw": "orange",
"BM_WT.bw": "blue",
"IP2_MLTC-1_5.bw": "orange",
"Input3_MLTC-1_6.bw": "blue",
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file in ["BM_KO.bw", "BM_WT.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=15000, color=color)
# Add genes section with collapsed display
ini_content += genes_section
# Add the ATTTA regions file above the reproducible eCLIP peaks
are_file = beds_dir / "ATTTA.bed"
ini_content += generate_track_section(
are_file, 'ARE Sequences', 'bed',
display="collapsed", style="box", color="purple"
)
# Add the converted peaks track
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", strand=True, labels=False, color="black"
)
# Add IP2*.bw and Input3*.bw coverage tracks
bw_max_value = 17500 # Specific maximum value for both input and IP bam coverage tracks
for bw_file in ["IP2_MLTC-1_5.bw", "Input3_MLTC-1_6.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', color=color, max_value=bw_max_value, min_value=0)
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Fstl1.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully.")
Configuration file generated successfully.
In [36]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene Fstl1 based on the given specification
region = "chr16:37833687-37836958"
# Generate the pyGenomeTracks plot in SVG format
subprocess.run([
"pyGenomeTracks",
"--tracks", config_file_path,
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "130",
"-o", "output_Fstl1_colored_peaks.png" # Change the output filename and extension to .svg
])
print("Plot generated successfully in SVG format.")
Plot generated successfully in SVG format.
Traceback (most recent call last):
File "/home/gsgeorge/anaconda3/envs/pygenometracks/lib/python3.12/site-packages/pygenometracks/tracksClass.py", line 784, in check_file_exists
open(file_name_to_check, 'r').close()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: '/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5/ECLIPSE_BIO_SFTP/files/ATTTA.bed'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/gsgeorge/anaconda3/envs/pygenometracks/lib/python3.12/site-packages/pygenometracks/tracksClass.py", line 792, in check_file_exists
open(name_with_tracks_path_to_check, 'r').close()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: '//mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5/ECLIPSE_BIO_SFTP/files/ATTTA.bed'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/gsgeorge/anaconda3/envs/pygenometracks/bin/pyGenomeTracks", line 10, in <module>
sys.exit(main())
^^^^^^
File "/home/gsgeorge/anaconda3/envs/pygenometracks/lib/python3.12/site-packages/pygenometracks/plotTracks.py", line 275, in main
trp = PlotTracks(args.tracks.name, args.width, fig_height=args.height,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/gsgeorge/anaconda3/envs/pygenometracks/lib/python3.12/site-packages/pygenometracks/tracksClass.py", line 317, in __init__
self.parse_tracks(tracks_file, plot_regions=plot_regions)
File "/home/gsgeorge/anaconda3/envs/pygenometracks/lib/python3.12/site-packages/pygenometracks/tracksClass.py", line 716, in parse_tracks
track_options = self.check_file_exists(track_options,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/gsgeorge/anaconda3/envs/pygenometracks/lib/python3.12/site-packages/pygenometracks/tracksClass.py", line 795, in check_file_exists
raise InputError(f"File in section [{track_dict['section_name']}] "
pygenometracks.utilities.InputError: File in section [7. [ARE Sequences]] not found:
/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5/ECLIPSE_BIO_SFTP/files/ATTTA.bed
In [37]:
# %%
from IPython.display import display
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
# Define the custom title
custom_title = 'Coverage Plot for Fstl1 Region'
# Load the image
img = mpimg.imread('output_Fstl1_colored_peaks.png')
# Plot the image with a custom title
plt.figure(figsize=(11, 8.5))
plt.imshow(img)
plt.title(custom_title, fontsize=12)
plt.axis('off') # Hide axes
plt.show()
In [38]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
"""
genes_section = """
[genes arrow]
file = /mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5/ECLIPSE_BIO_SFTP/files/Col4a1.gtf
height = 5
title = genes (gtf) with arrow
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
display = stacked
[spacer]
height = 1
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = 1
title = Chromosomal Location
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized with colors
files_to_plot = {
"BM_KO.bw": "orange",
"BM_WT.bw": "blue",
"IP2_MLTC-1_5.bw": "orange",
"Input3_MLTC-1_6.bw": "blue",
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file in ["BM_KO.bw", "BM_WT.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=15000, color=color)
# Add genes section with collapsed display
ini_content += genes_section
# Add the TAAAT regions file above the reproducible eCLIP peaks
are_file = beds_dir / "TAAAT.bed"
ini_content += generate_track_section(
are_file, 'ARE Sequences', 'bed',
display="collapsed", style="box", color="purple"
)
# Add the converted peaks track
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", strand=True, labels=False, color="black"
)
# Add IP2*.bw and Input3*.bw coverage tracks
bw_max_value = 30000 # Specific maximum value for both input and IP bam coverage tracks
for bw_file in ["IP2_MLTC-1_5.bw", "Input3_MLTC-1_6.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', color=color, max_value=bw_max_value, min_value=0)
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Col4a1.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully.")
Configuration file generated successfully.
In [39]:
import subprocess
# Define the configuration file path
config_file_path = "bed_and_gtf_tracks_with_peaks_Col4a1.ini"
# Define the region for the Col4a1 gene
region = "chr8:11197934-11205446"
# Generate the pyGenomeTracks plot in SVG format
subprocess.run([
"pyGenomeTracks",
"--tracks", config_file_path,
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "130",
"-o", "output_Col4a1_colored_peaks.svg"
])
print("Plot generated successfully in SVG format.")
Plot generated successfully in SVG format.
Traceback (most recent call last):
File "/home/gsgeorge/anaconda3/envs/pygenometracks/lib/python3.12/site-packages/pygenometracks/tracksClass.py", line 784, in check_file_exists
open(file_name_to_check, 'r').close()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: '/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5/ECLIPSE_BIO_SFTP/files/TAAAT.bed'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/gsgeorge/anaconda3/envs/pygenometracks/lib/python3.12/site-packages/pygenometracks/tracksClass.py", line 792, in check_file_exists
open(name_with_tracks_path_to_check, 'r').close()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: '//mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5/ECLIPSE_BIO_SFTP/files/TAAAT.bed'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/gsgeorge/anaconda3/envs/pygenometracks/bin/pyGenomeTracks", line 10, in <module>
sys.exit(main())
^^^^^^
File "/home/gsgeorge/anaconda3/envs/pygenometracks/lib/python3.12/site-packages/pygenometracks/plotTracks.py", line 275, in main
trp = PlotTracks(args.tracks.name, args.width, fig_height=args.height,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/gsgeorge/anaconda3/envs/pygenometracks/lib/python3.12/site-packages/pygenometracks/tracksClass.py", line 317, in __init__
self.parse_tracks(tracks_file, plot_regions=plot_regions)
File "/home/gsgeorge/anaconda3/envs/pygenometracks/lib/python3.12/site-packages/pygenometracks/tracksClass.py", line 716, in parse_tracks
track_options = self.check_file_exists(track_options,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/gsgeorge/anaconda3/envs/pygenometracks/lib/python3.12/site-packages/pygenometracks/tracksClass.py", line 795, in check_file_exists
raise InputError(f"File in section [{track_dict['section_name']}] "
pygenometracks.utilities.InputError: File in section [7. [ARE Sequences]] not found:
/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5/ECLIPSE_BIO_SFTP/files/TAAAT.bed
In [40]:
# %%
from IPython.display import display, SVG
# Define the custom title
custom_title = 'Coverage Plot for Col4a1 Region'
# Display the SVG plot
display(SVG(filename='output_Col4a1_colored_peaks.svg'))
In [41]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene Col4a1 based on the given specification
region = "chr8:11197934-11205446"
# Generate the pyGenomeTracks plot in PNG format
subprocess.run([
"pyGenomeTracks",
"--tracks", config_file_path,
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "130",
"-o", "output_Col4a1_colored_peaks.png" # Change the output filename and extension to .svg
])
print("Plot generated successfully in SVG format.")
Plot generated successfully in SVG format.
Traceback (most recent call last):
File "/home/gsgeorge/anaconda3/envs/pygenometracks/lib/python3.12/site-packages/pygenometracks/tracksClass.py", line 784, in check_file_exists
open(file_name_to_check, 'r').close()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: '/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5/ECLIPSE_BIO_SFTP/files/TAAAT.bed'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/gsgeorge/anaconda3/envs/pygenometracks/lib/python3.12/site-packages/pygenometracks/tracksClass.py", line 792, in check_file_exists
open(name_with_tracks_path_to_check, 'r').close()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: '//mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5/ECLIPSE_BIO_SFTP/files/TAAAT.bed'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/gsgeorge/anaconda3/envs/pygenometracks/bin/pyGenomeTracks", line 10, in <module>
sys.exit(main())
^^^^^^
File "/home/gsgeorge/anaconda3/envs/pygenometracks/lib/python3.12/site-packages/pygenometracks/plotTracks.py", line 275, in main
trp = PlotTracks(args.tracks.name, args.width, fig_height=args.height,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/gsgeorge/anaconda3/envs/pygenometracks/lib/python3.12/site-packages/pygenometracks/tracksClass.py", line 317, in __init__
self.parse_tracks(tracks_file, plot_regions=plot_regions)
File "/home/gsgeorge/anaconda3/envs/pygenometracks/lib/python3.12/site-packages/pygenometracks/tracksClass.py", line 716, in parse_tracks
track_options = self.check_file_exists(track_options,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/gsgeorge/anaconda3/envs/pygenometracks/lib/python3.12/site-packages/pygenometracks/tracksClass.py", line 795, in check_file_exists
raise InputError(f"File in section [{track_dict['section_name']}] "
pygenometracks.utilities.InputError: File in section [7. [ARE Sequences]] not found:
/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5/ECLIPSE_BIO_SFTP/files/TAAAT.bed
In [42]:
# %%
from IPython.display import display
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
# Define the custom title
custom_title = 'Coverage Plot for Col4a1 Region'
# Load the image
img = mpimg.imread('output_Col4a1_colored_peaks.png')
# Plot the image with a custom title
plt.figure(figsize=(11, 8.5))
plt.imshow(img)
plt.title(custom_title, fontsize=12)
plt.axis('off') # Hide axes
plt.show()
NOW I AM GOING TO EXTEND THE REGIONS IN THE BED AND SEE IF THIS HELPS MY COVERAGE
In [43]:
# Define a function to extend BED intervals
def extend_bed_intervals(input_file, output_file, extend_length=150):
with open(input_file, 'r') as infile, open(output_file, 'w') as outfile:
for line in infile:
if line.strip():
# Split the line into columns
cols = line.strip().split('\t')
# Extract start and end positions
chr_name = cols[0]
start = int(cols[1]) - extend_length
end = int(cols[2]) + extend_length
# Ensure start is non-negative
start = max(0, start)
# Construct extended BED entry
extended_cols = [chr_name, str(start), str(end)] + cols[3:]
# Write the extended entry to the new file
outfile.write('\t'.join(extended_cols) + '\n')
# Set input and output BED file paths
input_bed_file = 'ECLIPSE_BIO_SFTP/files/ZPF36L2.reproducible_eclip_peaks.bed'
output_bed_file = 'ECLIPSE_BIO_SFTP/files/ZPF36L2.reproducible_eclip_peaks_extended_150.bed'
# Run the function to extend intervals and create the new BED file
extend_bed_intervals(input_bed_file, output_bed_file)
In [44]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
"""
genes_section = """
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 5
title = genes (gtf) with arrow
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
display = stacked
[spacer]
height = 1
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = 1
title = Chromosomal Location
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized with colors
files_to_plot = {
"BM_KO.bw": "orange",
"BM_WT.bw": "blue",
"IP2_MLTC-1_5.bw": "orange",
"Input3_MLTC-1_6.bw": "blue",
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file in ["BM_KO.bw", "BM_WT.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=15000, color=color)
# Add genes section with collapsed display
ini_content += genes_section
# Add the ATTTA regions file above the reproducible eCLIP peaks
are_file = beds_dir / "ATTTA_Fstl1.bed"
ini_content += generate_track_section(
are_file, 'ATTTA Sequence Motifs', 'bed',
display="collapsed", style="box", color="purple"
)
# # Add the ATTTA regions file above the reproducible eCLIP peaks
# are_file = beds_dir / "TAAAT_Fstl1.bed"
# ini_content += generate_track_section(
# are_file, 'TAAAT Sequence Motifs', 'bed',
# display="collapsed", style="box", color="purple"
# )
# Add the converted peaks track
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks_extended_150.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", strand=True, labels=False, color="black"
)
# Add IP2*.bw and Input3*.bw coverage tracks
bw_max_value = 17500 # Specific maximum value for both input and IP bam coverage tracks
for bw_file in ["IP2_MLTC-1_5.bw", "Input3_MLTC-1_6.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', color=color, max_value=bw_max_value, min_value=0)
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Fstl1.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully.")
Configuration file generated successfully.
In [45]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene Fstl1 based on the given specification
region = "chr16:37833687-37836958"
# Generate the pyGenomeTracks plot in SVG format
subprocess.run([
"pyGenomeTracks",
"--tracks", config_file_path,
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "130",
"-o", "output_Fstl1_colored_peaks.png" # Change the output filename and extension to .svg
])
print("Plot generated successfully in SVG format.")
WARNING:pygenometracks.tracksClass:In section 8. [Reproducible eCLIP peaks], these parameters are unused:['strand']. INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:initialize 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:initialize 5. [genes arrow] 100%|██████████| 12/12 [00:00<00:00, 9337.97it/s] INFO:pygenometracks.tracksClass:initialize 6. [spacer] INFO:pygenometracks.tracksClass:initialize 7. [ATTTA Sequence Motifs] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'box' for section 7. [ATTTA Sequence Motifs] is not valid. style has been set to flybase. 100%|██████████| 87/87 [00:00<00:00, 73747.87it/s] INFO:pygenometracks.tracksClass:initialize 8. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 8. [Reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 8/8 [00:00<00:00, 47127.01it/s] INFO:pygenometracks.tracksClass:initialize 9. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 10. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 11. [x-axis] INFO:pygenometracks.tracksClass:initialize 12. [spacer] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.4943561553955078 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 30.319148936170215. Dpi is set to 130 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:plotting 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:plotting 5. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 5.68,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (5.68, -0.08) INFO:pygenometracks.tracksClass:plotting 6. [spacer] INFO:pygenometracks.tracksClass:plotting 7. [ATTTA Sequence Motifs] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [Reproducible eCLIP peaks] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 9. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 10. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 11. [x-axis] INFO:pygenometracks.tracksClass:plotting 12. [spacer]
Plot generated successfully in SVG format.
In [46]:
# %%
from IPython.display import display
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
# Define the custom title
custom_title = 'Coverage Plot for Fstl1 Region'
# Load the image
img = mpimg.imread('output_Fstl1_colored_peaks.png')
# Plot the image with a custom title
plt.figure(figsize=(11, 8.5))
plt.imshow(img)
plt.title(custom_title, fontsize=12)
plt.axis('off') # Hide axes
plt.show()
In [47]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
"""
genes_section = """
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 5
title = genes (gtf) with arrow
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
display = stacked
[spacer]
height = 1
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = 1
title = Chromosomal Location
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized with colors
files_to_plot = {
"BM_KO.bw": "orange",
"BM_WT.bw": "blue",
"IP2_MLTC-1_5.bw": "orange",
"Input3_MLTC-1_6.bw": "blue",
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file in ["BM_KO.bw", "BM_WT.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=15000, color=color)
# Add genes section with collapsed display
ini_content += genes_section
# Add the ATTTA regions file above the reproducible eCLIP peaks
are_file = beds_dir / "ATTTA_Col4a1.bed"
ini_content += generate_track_section(
are_file, 'ATTTA Sequence Motifs', 'bed',
display="collapsed", style="flybase", color="purple"
)
# Add the TAAAT regions file
taa_file = beds_dir / "TAAAT_Col4a1.bed"
ini_content += generate_track_section(
taa_file, 'TAAAT Sequence Motifs', 'bed',
display="collapsed", style="flybase", color="red"
)
# Add the converted peaks track
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks_extended_50.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="flybase", strand=True, labels=False, color="black"
)
# Add IP2*.bw and Input3*.bw coverage tracks
bw_max_value = 17500 # Specific maximum value for both input and IP bam coverage tracks
for bw_file in ["IP2_MLTC-1_5.bw", "Input3_MLTC-1_6.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', color=color, max_value=bw_max_value, min_value=0)
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Col4a1.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully.")
Configuration file generated successfully.
In [48]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene Col4a1 based on the given specification
region = "chr8:11197934-11205446"
# Generate the pyGenomeTracks plot in SVG format
subprocess.run([
"pyGenomeTracks",
"--tracks", config_file_path,
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "130",
"-o", "output_Col4a1_colored_peaks.png" # Adjust output filename
])
print("Plot generated successfully in SVG format.")
WARNING:pygenometracks.tracksClass:In section 9. [Reproducible eCLIP peaks], these parameters are unused:['strand']. INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:initialize 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:initialize 5. [genes arrow] 100%|██████████| 12/12 [00:00<00:00, 4698.62it/s] INFO:pygenometracks.tracksClass:initialize 6. [spacer] INFO:pygenometracks.tracksClass:initialize 7. [ATTTA Sequence Motifs] 100%|██████████| 131/131 [00:00<00:00, 158801.68it/s] INFO:pygenometracks.tracksClass:initialize 8. [TAAAT Sequence Motifs] 100%|██████████| 110/110 [00:00<00:00, 75721.88it/s] INFO:pygenometracks.tracksClass:initialize 9. [Reproducible eCLIP peaks] 100%|██████████| 13/13 [00:00<00:00, 77014.06it/s] INFO:pygenometracks.tracksClass:initialize 10. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 11. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 12. [x-axis] INFO:pygenometracks.tracksClass:initialize 13. [spacer] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.5163388252258301 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 33.51063829787234. Dpi is set to 130 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:plotting 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:plotting 5. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 7.9799999999999995,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (7.9799999999999995, -0.08) INFO:pygenometracks.tracksClass:plotting 6. [spacer] INFO:pygenometracks.tracksClass:plotting 7. [ATTTA Sequence Motifs] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [TAAAT Sequence Motifs] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 9. [Reproducible eCLIP peaks] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 10. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 11. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 12. [x-axis] INFO:pygenometracks.tracksClass:plotting 13. [spacer]
Plot generated successfully in SVG format.
In [49]:
# %%
from IPython.display import display
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
# Define the custom title
custom_title = 'Coverage Plot for Col4a1 Region'
# Load the image
img = mpimg.imread('output_Col4a1_colored_peaks.png')
# Plot the image with a custom title
plt.figure(figsize=(11, 8.5))
plt.imshow(img)
plt.title(custom_title, fontsize=12)
plt.axis('off') # Hide axes
plt.show()
Final pygenometrack
In [50]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
"""
genes_section = """
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 1
title = Fstl1 Gene 3' UTR
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
display = collapsed
labels=false
[spacer]
height = .5
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = .5
title = Chromosomal coordinates
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized with colors
files_to_plot = {
"BM_KO.bw": "orange",
"BM_WT.bw": "blue",
"IP2_MLTC-1_5.bw": "orange",
"Input3_MLTC-1_6.bw": "blue",
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file in ["BM_KO.bw", "BM_WT.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=15000, color=color)
# Add genes section with collapsed display
ini_content += genes_section
# Add the converted peaks track
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks_extended_150.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", labels=False, color="black", height=1
)
# Add IP2*.bw and Input3*.bw coverage tracks
bw_max_value = 17500 # Specific maximum value for both input and IP bam coverage tracks
for bw_file in ["IP2_MLTC-1_5.bw", "Input3_MLTC-1_6.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', color=color, max_value=bw_max_value, min_value=0)
# Add vhighlight for WTTTW regions
ini_content += f"""
[WTTTW Highlights]
file = {beds_dir / 'WTTTW_Fstl1.bed'}
type = vhighlight
color = yellow
alpha = 0.7
"""
# Add vhighlight for ATTTA sequences in green
ini_content += f"""
[ATTTA Highlights]
file = {beds_dir / 'ATTTA_Fstl1.bed'}
type = vhighlight
color = green
alpha = 0.7
"""
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Fstl1.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully.")
Configuration file generated successfully.
In [51]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene Fstl1 based on the given specification
region = "chr16:37833687-37836958"
# Generate the pyGenomeTracks plot in PNG format
subprocess.run([
"pyGenomeTracks",
"--tracks", "bed_and_gtf_tracks_with_peaks_Fstl1.ini",
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "600",
"-o", "output_Fstl1_colored_peaks.png"
])
print("Plot generated successfully in PNG format.")
INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:initialize 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:initialize 5. [genes arrow] 100%|██████████| 12/12 [00:00<00:00, 9596.12it/s] INFO:pygenometracks.tracksClass:initialize 6. [spacer] INFO:pygenometracks.tracksClass:initialize 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 7. [Reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 8/8 [00:00<00:00, 72160.07it/s] INFO:pygenometracks.tracksClass:initialize 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 12. [x-axis] INFO:pygenometracks.tracksClass:initialize 13. [spacer] INFO:pygenometracks.tracksClass:initialize 10. [WTTTW Highlights] 100%|██████████| 44/44 [00:00<00:00, 160060.17it/s] INFO:pygenometracks.tracksClass:initialize 11. [ATTTA Highlights] 100%|██████████| 4/4 [00:00<00:00, 53261.00it/s] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.529362678527832 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 19.680851063829788. Dpi is set to 600 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:plotting 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:plotting 5. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 6. [spacer] INFO:pygenometracks.tracksClass:plotting 7. [Reproducible eCLIP peaks] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 12. [x-axis] INFO:pygenometracks.tracksClass:plotting 13. [spacer]
Plot generated successfully in PNG format.
In [52]:
# %%
from IPython.display import display
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
# Define the custom title
custom_title = 'Coverage Plot for Fstl1 Region'
# Load the image
img = mpimg.imread('output_Fstl1_colored_peaks.png')
# Plot the image with a custom title
plt.figure(figsize=(11, 8.5))
plt.imshow(img)
plt.title(custom_title, fontsize=12)
plt.axis('off') # Hide axes
plt.show()
In [53]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
"""
genes_section = """
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 1
title = Serpinh1 gene
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
display = collapsed
[spacer]
height = .5
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = .5
title = Chromosomal coordinates
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized with colors
files_to_plot = {
"BM_KO.bw": "orange",
"BM_WT.bw": "blue",
"IP2_MLTC-1_5.bw": "orange",
"Input3_MLTC-1_6.bw": "blue",
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file in ["BM_KO.bw", "BM_WT.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=15000, color=color)
# Add genes section with collapsed display
ini_content += genes_section
# Add the converted peaks track
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", labels=False, color="black", height=1
)
# Add IP2*.bw and Input3*.bw coverage tracks
bw_max_value = 17500 # Specific maximum value for both input and IP bam coverage tracks
for bw_file in ["IP2_MLTC-1_5.bw", "Input3_MLTC-1_6.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', color=color, max_value=bw_max_value, min_value=0)
# Add vhighlight for WTTTW regions
ini_content += f"""
[WTTTW Highlights]
file = {beds_dir / 'WTTTW_Serpinh1.bed'}
type = vhighlight
color = green
alpha = 0.7
"""
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Serpinh1.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully.")
Configuration file generated successfully.
In [54]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene Serpinh1 based on the given specification
region = "chr7:99345245-99346200"
# Generate the pyGenomeTracks plot in PNG format
subprocess.run([
"pyGenomeTracks",
"--tracks", "bed_and_gtf_tracks_with_peaks_Serpinh1.ini",
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "600",
"-o", "output_Serpinh1_colored_peaks.png"
])
print("Plot generated successfully in PNG format.")
INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:initialize 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:initialize 5. [genes arrow] 100%|██████████| 25/25 [00:00<00:00, 8819.72it/s] INFO:pygenometracks.tracksClass:initialize 6. [spacer] INFO:pygenometracks.tracksClass:initialize 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 7. [Reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 4/4 [00:00<00:00, 32201.95it/s] INFO:pygenometracks.tracksClass:initialize 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 11. [x-axis] INFO:pygenometracks.tracksClass:initialize 12. [spacer] INFO:pygenometracks.tracksClass:initialize 10. [WTTTW Highlights] 100%|██████████| 21/21 [00:00<00:00, 72019.94it/s] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.5272266864776611 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 19.680851063829788. Dpi is set to 600 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:plotting 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:plotting 5. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 6. [spacer] INFO:pygenometracks.tracksClass:plotting 7. [Reproducible eCLIP peaks] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 11. [x-axis] INFO:pygenometracks.tracksClass:plotting 12. [spacer]
Plot generated successfully in PNG format.
In [55]:
# %%
from IPython.display import display
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
# Define the custom title
custom_title = 'Coverage Plot for Serpinh1 Region'
# Load the image
img = mpimg.imread('output_Serpinh1_colored_peaks.png')
# Plot the image with a custom title
plt.figure(figsize=(11, 8.5))
plt.imshow(img)
plt.title(custom_title, fontsize=12)
plt.axis('off') # Hide axes
plt.show()
Final Serpina1
In [56]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
"""
genes_section = """
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 1
title = Serpinh1 gene 3' UTR
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
display = collapsed
labels=false
[spacer]
height = .5
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = .5
title = Chromosomal coordinates
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
# Add additional arguments.
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized with colors
files_to_plot = {
"BM_KO.bw": "orange",
"BM_WT.bw": "blue",
"IP2_MLTC-1_5.bw": "orange",
"Input3_MLTC-1_6.bw": "blue",
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file in ["BM_KO.bw", "BM_WT.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=30000, color=color)
# Add genes section with collapsed display
ini_content += genes_section
# Add the converted peaks track
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", labels=False, color="black", height=1
)
# Add IP2*.bw and Input3*.bw coverage tracks
bw_max_value = 17500 # Specific maximum value for both input and IP bam coverage tracks
for bw_file in ["IP2_MLTC-1_5.bw", "Input3_MLTC-1_6.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', color=color, max_value=bw_max_value, min_value=0)
# Add vhighlight for WTTTW regions in yellow
ini_content += f"""
[WTTTW Highlights]
file = {beds_dir / 'WTTTW_Serpinh1.bed'}
type = vhighlight
color = yellow
alpha = 0.3
"""
# Add vhighlight for ATTTA sequences in green
ini_content += f"""
[ATTTA Highlights]
file = {beds_dir / 'ATTTA_Serpinh1.bed'}
type = vhighlight
color = green
alpha = 0.7
"""
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Serpinh1.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully.")
Configuration file generated successfully.
In [57]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene Serpinh1 based on the given specification
region = "chr7:99345245-99346200"
# Generate the pyGenomeTracks plot in PNG format
subprocess.run([
"pyGenomeTracks",
"--tracks", "bed_and_gtf_tracks_with_peaks_Serpinh1.ini",
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "600",
"-o", "output_Serpinh1_colored_peaks.png"
])
print("Plot generated successfully in PNG format.")
INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:initialize 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:initialize 5. [genes arrow] 100%|██████████| 25/25 [00:00<00:00, 8500.13it/s] INFO:pygenometracks.tracksClass:initialize 6. [spacer] INFO:pygenometracks.tracksClass:initialize 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 7. [Reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 4/4 [00:00<00:00, 34309.24it/s] INFO:pygenometracks.tracksClass:initialize 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 12. [x-axis] INFO:pygenometracks.tracksClass:initialize 13. [spacer] INFO:pygenometracks.tracksClass:initialize 10. [WTTTW Highlights] 100%|██████████| 21/21 [00:00<00:00, 67032.26it/s] INFO:pygenometracks.tracksClass:initialize 11. [ATTTA Highlights] 100%|██████████| 2/2 [00:00<00:00, 49636.73it/s] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.5421643257141113 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 19.680851063829788. Dpi is set to 600 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:plotting 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:plotting 5. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 6. [spacer] INFO:pygenometracks.tracksClass:plotting 7. [Reproducible eCLIP peaks] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 12. [x-axis] INFO:pygenometracks.tracksClass:plotting 13. [spacer]
Plot generated successfully in PNG format.
In [58]:
# %%
from IPython.display import display
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
# Define the custom title
custom_title = 'Coverage Plot for Serpinh1 Region'
# Load the image
img = mpimg.imread('output_Serpinh1_colored_peaks.png')
# Plot the image with a custom title
plt.figure(figsize=(11, 8.5))
plt.imshow(img)
plt.title(custom_title, fontsize=12)
plt.axis('off') # Hide axes
plt.show()
Final Col4a1
In [59]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
"""
genes_section = """
[genes arrow]
file = Col4a1.gtf
height = 1
title = Col4a1 gene 3' UTR
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
display = collapsed
[spacer]
height = .5
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = .5
title = Chromosomal coordinates
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
# Add additional arguments.
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized with colors
files_to_plot = {
"BM_KO.bw": "orange",
"BM_WT.bw": "blue",
"IP2_MLTC-1_5.bw": "orange",
"Input3_MLTC-1_6.bw": "blue",
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file in ["BM_KO.bw", "BM_WT.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=15000, color=color)
# Add genes section with collapsed display
ini_content += genes_section
# Add the converted peaks track
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks_extended_150.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", labels=False, color="black", height=1
)
# Add IP2*.bw and Input3*.bw coverage tracks
bw_max_value = 17500 # Specific maximum value for both input and IP bam coverage tracks
for bw_file in ["IP2_MLTC-1_5.bw", "Input3_MLTC-1_6.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', color=color, max_value=bw_max_value, min_value=0)
# Add vhighlight for WTTTW regions in yellow
ini_content += f"""
[WTTTW Highlights]
file = {beds_dir / 'WTTTW_Col4a1.bed'}
type = vhighlight
color = yellow
alpha = 0.3
"""
# Add vhighlight for ATTTA sequences in green
ini_content += f"""
[ATTTA Highlights]
file = {beds_dir / 'ATTTA_Col4a1.bed'}
type = vhighlight
color = green
alpha = 0.7
"""
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Col4a1.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully for Col4a1.")
Configuration file generated successfully for Col4a1.
In [60]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene Col4a1 based on the given specification
region = "chr8:11197934-11200000"
# Generate the pyGenomeTracks plot in PNG format
subprocess.run([
"pyGenomeTracks",
"--tracks", "bed_and_gtf_tracks_with_peaks_Col4a1.ini",
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "600",
"-o", "output_Col4a1_colored_peaks.png"
])
print("Plot generated successfully in PNG format for Col4a1.")
INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:initialize 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:initialize 5. [genes arrow] 100%|██████████| 8/8 [00:00<00:00, 3430.92it/s] INFO:pygenometracks.tracksClass:initialize 6. [spacer] INFO:pygenometracks.tracksClass:initialize 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 7. [Reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 13/13 [00:00<00:00, 51634.42it/s] INFO:pygenometracks.tracksClass:initialize 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 12. [x-axis] INFO:pygenometracks.tracksClass:initialize 13. [spacer] INFO:pygenometracks.tracksClass:initialize 10. [WTTTW Highlights] 100%|██████████| 10/10 [00:00<00:00, 106725.29it/s] INFO:pygenometracks.tracksClass:initialize 11. [ATTTA Highlights] 100%|██████████| 1/1 [00:00<00:00, 20763.88it/s] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.03635907173156738 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 19.680851063829788. Dpi is set to 600 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:plotting 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:plotting 5. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 6. [spacer] INFO:pygenometracks.tracksClass:plotting 7. [Reproducible eCLIP peaks] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 12. [x-axis] INFO:pygenometracks.tracksClass:plotting 13. [spacer]
Plot generated successfully in PNG format for Col4a1.
In [61]:
# %%
from IPython.display import display
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
# Define the custom title
custom_title = 'Coverage Plot for Col4a1 Region'
# Load the image
img = mpimg.imread('output_Col4a1_colored_peaks.png')
# Plot the image with a custom title
plt.figure(figsize=(11, 8.5))
plt.imshow(img)
plt.title(custom_title, fontsize=12)
plt.axis('off') # Hide axes
plt.show()
Final Fstl1
In [62]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
"""
genes_section = """
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 1
title = Fstl1 gene 3' UTR
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
display = collapsed
labels=false
[spacer]
height = .5
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = .5
title = Chromosomal coordinates
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
# Add additional arguments.
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized with colors
files_to_plot = {
"BM_KO.bw": "orange",
"BM_WT.bw": "blue",
"IP2_MLTC-1_5.bw": "orange",
"Input3_MLTC-1_6.bw": "blue",
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file in ["BM_KO.bw", "BM_WT.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=15000, color=color)
# Add genes section with collapsed display
ini_content += genes_section
# Add the converted peaks track
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks_extended_150.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", labels=False, color="black", height=1
)
# Add IP2*.bw and Input3*.bw coverage tracks
bw_max_value = 17500 # Specific maximum value for both input and IP bam coverage tracks
for bw_file in ["IP2_MLTC-1_5.bw", "Input3_MLTC-1_6.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', color=color, max_value=bw_max_value, min_value=0)
# Add vhighlight for WTTTW regions in yellow
ini_content += f"""
[WTTTW Highlights]
file = {beds_dir / 'WTTTW_Fstl1.bed'}
type = vhighlight
color = yellow
alpha = 0.3
"""
# Add vhighlight for ATTTA sequences in green
ini_content += f"""
[ATTTA Highlights]
file = {beds_dir / 'ATTTA_Fstl1.bed'}
type = vhighlight
color = green
alpha = 0.7
"""
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Fstl1.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully for Fstl1.")
Configuration file generated successfully for Fstl1.
In [63]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene Fstl1 based on the given specification
region = "chr16:37833687-37836958"
# Generate the pyGenomeTracks plot in PNG format
subprocess.run([
"pyGenomeTracks",
"--tracks", "bed_and_gtf_tracks_with_peaks_Fstl1.ini",
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "600",
"-o", "output_Fstl1_colored_peaks.png"
])
print("Plot generated successfully in PNG format for Fstl1.")
INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:initialize 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:initialize 5. [genes arrow] 100%|██████████| 12/12 [00:00<00:00, 9469.74it/s] INFO:pygenometracks.tracksClass:initialize 6. [spacer] INFO:pygenometracks.tracksClass:initialize 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 7. [Reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 8/8 [00:00<00:00, 62836.01it/s] INFO:pygenometracks.tracksClass:initialize 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 12. [x-axis] INFO:pygenometracks.tracksClass:initialize 13. [spacer] INFO:pygenometracks.tracksClass:initialize 10. [WTTTW Highlights] 100%|██████████| 44/44 [00:00<00:00, 146467.76it/s] INFO:pygenometracks.tracksClass:initialize 11. [ATTTA Highlights] 100%|██████████| 4/4 [00:00<00:00, 37117.73it/s] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.5117721557617188 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 19.680851063829788. Dpi is set to 600 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:plotting 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:plotting 5. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 6. [spacer] INFO:pygenometracks.tracksClass:plotting 7. [Reproducible eCLIP peaks] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 12. [x-axis] INFO:pygenometracks.tracksClass:plotting 13. [spacer]
Plot generated successfully in PNG format for Fstl1.
In [64]:
# %%
from IPython.display import display
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
# Define the custom title
custom_title = 'Coverage Plot for Fstl1 Region'
# Load the image
img = mpimg.imread('output_Fstl1_colored_peaks.png')
# Plot the image with a custom title
plt.figure(figsize=(11, 8.5))
plt.imshow(img)
plt.title(custom_title, fontsize=12)
plt.axis('off') # Hide axes
plt.show()
Final Lpl
In [65]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
"""
genes_section = """
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 1
title = Lpl gene 3' UTR
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
display = collapsed
labels=false
[spacer]
height = .5
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = .5
title = Chromosomal coordinates
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
# Add additional arguments.
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized with colors
files_to_plot = {
"BM_KO.bw": "orange",
"BM_WT.bw": "blue",
"IP2_MLTC-1_5.bw": "orange",
"Input3_MLTC-1_6.bw": "blue",
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file in ["BM_KO.bw", "BM_WT.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=15000, color=color)
# Add genes section with collapsed display
ini_content += genes_section
# Add the converted peaks track
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks_extended_100.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", labels=False, color="black", height=1
)
# Add IP2*.bw and Input3*.bw coverage tracks
bw_max_value = 20000 # Specific maximum value for both input and IP bam coverage tracks
for bw_file in ["IP2_MLTC-1_5.bw", "Input3_MLTC-1_6.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', color=color, max_value=bw_max_value, min_value=0)
# Add vhighlight for WTTTW regions in yellow
ini_content += f"""
[WTTTW Highlights]
file = {beds_dir / 'WTTTW_Lpl.bed'}
type = vhighlight
color = yellow
alpha = 0.3
"""
# Add vhighlight for ATTTA sequences in green
ini_content += f"""
[ATTTA Highlights]
file = {beds_dir / 'ATTTA_Lpl.bed'}
type = vhighlight
color = green
alpha = 0.7
"""
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Lpl.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully for Lpl.")
Configuration file generated successfully for Lpl.
In [66]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene Lpl based on the given specification
region = "chr8:68904334-68907765"
# Generate the pyGenomeTracks plot in PNG format
subprocess.run([
"pyGenomeTracks",
"--tracks", "bed_and_gtf_tracks_with_peaks_Lpl.ini",
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "600",
"-o", "output_Lpl_colored_peaks.png"
])
print("Plot generated successfully in PNG format for Lpl.")
INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:initialize 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:initialize 5. [genes arrow] 100%|██████████| 14/14 [00:00<00:00, 7401.09it/s] INFO:pygenometracks.tracksClass:initialize 6. [spacer] INFO:pygenometracks.tracksClass:initialize 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 7. [Reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 4/4 [00:00<00:00, 50994.58it/s] INFO:pygenometracks.tracksClass:initialize 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 12. [x-axis] INFO:pygenometracks.tracksClass:initialize 13. [spacer] INFO:pygenometracks.tracksClass:initialize 10. [WTTTW Highlights] 100%|██████████| 56/56 [00:00<00:00, 75890.48it/s] INFO:pygenometracks.tracksClass:initialize 11. [ATTTA Highlights] 100%|██████████| 9/9 [00:00<00:00, 67893.41it/s] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.5426626205444336 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 19.680851063829788. Dpi is set to 600 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:plotting 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:plotting 5. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 6. [spacer] INFO:pygenometracks.tracksClass:plotting 7. [Reproducible eCLIP peaks] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 12. [x-axis] INFO:pygenometracks.tracksClass:plotting 13. [spacer]
Plot generated successfully in PNG format for Lpl.
In [67]:
# %%
from IPython.display import display
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
# Define the custom title
custom_title = 'Coverage Plot for Lpl Region'
# Load the image
img = mpimg.imread('output_Lpl_colored_peaks.png')
# Plot the image with a custom title
plt.figure(figsize=(11, 8.5))
plt.imshow(img)
plt.title(custom_title, fontsize=12)
plt.axis('off') # Hide axes
plt.show()
In [68]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
"""
genes_section = """
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 1
title = Lox gene
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
display = collapsed
[spacer]
height = .5
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = .5
title = Chromosomal coordinates
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
# Add additional arguments.
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized with colors
files_to_plot = {
"BM_KO.bw": "orange",
"BM_WT.bw": "blue",
"IP2_MLTC-1_5.bw": "orange",
"Input3_MLTC-1_6.bw": "blue",
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file in ["BM_KO.bw", "BM_WT.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=15000, color=color)
# Add genes section with collapsed display
ini_content += genes_section
# Add the converted peaks track
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", labels=False, color="black", height=1
)
# Add IP2*.bw and Input3*.bw coverage tracks
bw_max_value = 17500 # Specific maximum value for both input and IP coverage tracks
for bw_file in ["IP2_MLTC-1_5.bw", "Input3_MLTC-1_6.bw"]:
file_path = output_bw_dir / bw_file
color = files_to_plot[bw_file]
title = f"{bw_file} coverage"
ini_content += generate_track_section(file_path, title, 'bigwig', color=color, max_value=bw_max_value, min_value=0)
# Add vhighlight for WTTTW regions in yellow
ini_content += f"""
[WTTTW Highlights]
file = {beds_dir / 'WTTTW_Lox.bed'}
type = vhighlight
color = yellow
alpha = 0.3
"""
# Add vhighlight for ATTTA sequences in green
ini_content += f"""
[ATTTA Highlights]
file = {beds_dir / 'ATTTA_Lox.bed'}
type = vhighlight
color = green
alpha = 0.7
"""
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Lox.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully.")
Configuration file generated successfully.
In [69]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene Lox based on the given specification
region = "chr18:52515956-52519292"
# Generate the pyGenomeTracks plot in PNG format
subprocess.run([
"pyGenomeTracks",
"--tracks", "bed_and_gtf_tracks_with_peaks_Lox.ini",
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "600",
"-o", "output_Lox_colored_peaks.png"
])
print("Plot generated successfully in PNG format.")
INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:initialize 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:initialize 5. [genes arrow] 100%|██████████| 13/13 [00:00<00:00, 9229.17it/s] INFO:pygenometracks.tracksClass:initialize 6. [spacer] INFO:pygenometracks.tracksClass:initialize 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 7. [Reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 2/2 [00:00<00:00, 22017.34it/s] INFO:pygenometracks.tracksClass:initialize 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:initialize 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:initialize 12. [x-axis] INFO:pygenometracks.tracksClass:initialize 13. [spacer] INFO:pygenometracks.tracksClass:initialize 10. [WTTTW Highlights] 100%|██████████| 72/72 [00:00<00:00, 160803.99it/s] INFO:pygenometracks.tracksClass:initialize 11. [ATTTA Highlights] 100%|██████████| 10/10 [00:00<00:00, 70611.18it/s] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.4968526363372803 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 19.680851063829788. Dpi is set to 600 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [BM_KO.bw coverage] INFO:pygenometracks.tracksClass:plotting 4. [BM_WT.bw coverage] INFO:pygenometracks.tracksClass:plotting 5. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 6. [spacer] INFO:pygenometracks.tracksClass:plotting 7. [Reproducible eCLIP peaks] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [IP2_MLTC-1_5.bw coverage] INFO:pygenometracks.tracksClass:plotting 9. [Input3_MLTC-1_6.bw coverage] INFO:pygenometracks.tracksClass:plotting 12. [x-axis] INFO:pygenometracks.tracksClass:plotting 13. [spacer]
Plot generated successfully in PNG format.
In [70]:
# %%
from IPython.display import display
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
# Define the custom title
custom_title = 'Coverage Plot for Lox Region'
# Load the image
img = mpimg.imread('output_Lox_colored_peaks.png')
# Plot the image with a custom title
plt.figure(figsize=(11, 8.5))
plt.imshow(img)
plt.title(custom_title, fontsize=12)
plt.axis('off') # Hide axes
plt.show()
FINAL VERSIONS
Serpinh1
In [71]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
"""
genes_section = """
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 1
title = Serpinh1 gene 3' UTR
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
display = collapsed
labels = false
[spacer]
height = .5
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = .5
title = Chromosomal coordinates
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
# Add additional arguments.
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized with updated names and colors
files_to_plot = {
"BM_KO.bw": ("KO Condition Coverage", "orange"),
"BM_WT.bw": ("WT Condition Coverage", "blue"),
}
# Generate the .ini content
ini_content = header
# Add the specified coverage tracks
for bw_file, (title, color) in files_to_plot.items():
file_path = output_bw_dir / bw_file
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=30000, color=color)
# Add genes section with collapsed display
ini_content += genes_section
# Add the converted peaks track
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", labels=False, color="black", height=1
)
# Additional files for IP2 and IgG IP coverage
additional_files_to_plot = {
"IP2_MLTC-1_5.bw": ("Zfp36L2 IP", "red"),
"Input3_MLTC-1_6.bw": ("IgG IP", "purple"),
}
# Add additional coverage tracks
for bw_file, (title, color) in additional_files_to_plot.items():
file_path = output_bw_dir / bw_file
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=15000, color=color)
# Add vhighlight for WTTTW regions in yellow
ini_content += f"""
[WTTTW Highlights]
file = {beds_dir / 'WTTTW_Serpinh1.bed'}
type = vhighlight
color = yellow
alpha = 0.3
"""
# Add vhighlight for ATTTA sequences in green
ini_content += f"""
[ATTTA Highlights]
file = {beds_dir / 'ATTTA_Serpinh1.bed'}
type = vhighlight
color = green
alpha = 0.7
"""
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Serpinh1.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully.")
Configuration file generated successfully.
In [72]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene Serpinh1 based on the given specification
region = "chr7:99345245-99346200"
# Generate the pyGenomeTracks plot in SVG format
subprocess.run([
"pyGenomeTracks",
"--tracks", "bed_and_gtf_tracks_with_peaks_Serpinh1.ini",
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "600",
"-o", "output_Serpinh1_colored_peaks_final.svg" # Save as SVG
])
print("Plot generated successfully in SVG format.")
INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [KO Condition Coverage] INFO:pygenometracks.tracksClass:initialize 4. [WT Condition Coverage] INFO:pygenometracks.tracksClass:initialize 5. [genes arrow] 100%|██████████| 25/25 [00:00<00:00, 9009.93it/s] INFO:pygenometracks.tracksClass:initialize 6. [spacer] INFO:pygenometracks.tracksClass:initialize 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 7. [Reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 4/4 [00:00<00:00, 33487.46it/s] INFO:pygenometracks.tracksClass:initialize 8. [Zfp36L2 IP] INFO:pygenometracks.tracksClass:initialize 9. [IgG IP] INFO:pygenometracks.tracksClass:initialize 12. [x-axis] INFO:pygenometracks.tracksClass:initialize 13. [spacer] INFO:pygenometracks.tracksClass:initialize 10. [WTTTW Highlights] 100%|██████████| 21/21 [00:00<00:00, 69245.58it/s] INFO:pygenometracks.tracksClass:initialize 11. [ATTTA Highlights] 100%|██████████| 2/2 [00:00<00:00, 34239.22it/s] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.5120649337768555 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 19.680851063829788. Dpi is set to 600 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [KO Condition Coverage] INFO:pygenometracks.tracksClass:plotting 4. [WT Condition Coverage] INFO:pygenometracks.tracksClass:plotting 5. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 6. [spacer] INFO:pygenometracks.tracksClass:plotting 7. [Reproducible eCLIP peaks] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [Zfp36L2 IP] INFO:pygenometracks.tracksClass:plotting 9. [IgG IP] INFO:pygenometracks.tracksClass:plotting 12. [x-axis] INFO:pygenometracks.tracksClass:plotting 13. [spacer]
Plot generated successfully in SVG format.
In [73]:
# %%
from IPython.display import SVG, display
# Display the SVG file directly in the notebook
svg_path = 'output_Serpinh1_colored_peaks_final.svg'
# Define the custom title
custom_title = 'Coverage Plot for Serpinh1 Region'
# Display the SVG with a title
display(SVG(filename=svg_path))
print(custom_title)
Coverage Plot for Serpinh1 Region
Col4a1
In [74]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
"""
genes_section = """
[genes arrow]
file = Col4a1.gtf
height = 1
title = Col4a1 gene 3' UTR
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
display = collapsed
labels = false
[spacer]
height = .5
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = .5
title = Chromosomal coordinates
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
# Add additional arguments.
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized with updated names and colors
files_to_plot = {
"BM_KO.bw": ("KO Condition Coverage", "orange"),
"BM_WT.bw": ("WT Condition Coverage", "blue"),
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file, (title, color) in files_to_plot.items():
file_path = output_bw_dir / bw_file
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=6000, color=color)
# Add genes section with collapsed display
ini_content += genes_section
# Add the converted peaks track
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", labels=False, color="black", height=1
)
# Additional files for IP2 and IgG IP coverage
additional_files_to_plot = {
"IP2_MLTC-1_5.bw": ("Zfp36L2 IP", "red"),
"Input3_MLTC-1_6.bw": ("IgG IP", "purple"),
}
bw_max_value = 25000 # Specific maximum value for both input and IP bam coverage tracks
# Add additional coverage tracks
for bw_file, (title, color) in additional_files_to_plot.items():
file_path = output_bw_dir / bw_file
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=bw_max_value, color=color)
# Add vhighlight for WTTTW regions in yellow
ini_content += f"""
[WTTTW Highlights]
file = {beds_dir / 'WTTTW_Col4a1.bed'}
type = vhighlight
color = yellow
alpha = 0.3
"""
# Add vhighlight for ATTTA sequences in green
ini_content += f"""
[ATTTA Highlights]
file = {beds_dir / 'ATTTA_Col4a1.bed'}
type = vhighlight
color = green
alpha = 0.7
"""
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Col4a1.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully for Col4a1.")
Configuration file generated successfully for Col4a1.
In [75]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene Col4a1 based on the given specification
region = "chr8:11197934-11200000"
# Generate the pyGenomeTracks plot in PNG format
subprocess.run([
"pyGenomeTracks",
"--tracks", "bed_and_gtf_tracks_with_peaks_Col4a1.ini",
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "600",
"-o", "output_Col4a1_colored_peaks_final.svg"
])
print("Plot generated successfully in svg format for Col4a1.")
INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [KO Condition Coverage] INFO:pygenometracks.tracksClass:initialize 4. [WT Condition Coverage] INFO:pygenometracks.tracksClass:initialize 5. [genes arrow] 100%|██████████| 8/8 [00:00<00:00, 3484.00it/s] INFO:pygenometracks.tracksClass:initialize 6. [spacer] INFO:pygenometracks.tracksClass:initialize 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 7. [Reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 13/13 [00:00<00:00, 111050.82it/s] INFO:pygenometracks.tracksClass:initialize 8. [Zfp36L2 IP] INFO:pygenometracks.tracksClass:initialize 9. [IgG IP] INFO:pygenometracks.tracksClass:initialize 12. [x-axis] INFO:pygenometracks.tracksClass:initialize 13. [spacer] INFO:pygenometracks.tracksClass:initialize 10. [WTTTW Highlights] 100%|██████████| 10/10 [00:00<00:00, 62695.13it/s] INFO:pygenometracks.tracksClass:initialize 11. [ATTTA Highlights] 100%|██████████| 1/1 [00:00<00:00, 21845.33it/s] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.029344558715820312 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 19.680851063829788. Dpi is set to 600 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [KO Condition Coverage] INFO:pygenometracks.tracksClass:plotting 4. [WT Condition Coverage] INFO:pygenometracks.tracksClass:plotting 5. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 6. [spacer] INFO:pygenometracks.tracksClass:plotting 7. [Reproducible eCLIP peaks] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [Zfp36L2 IP] INFO:pygenometracks.tracksClass:plotting 9. [IgG IP] INFO:pygenometracks.tracksClass:plotting 12. [x-axis] INFO:pygenometracks.tracksClass:plotting 13. [spacer]
Plot generated successfully in svg format for Col4a1.
In [76]:
# %%
from IPython.display import SVG, display
# Display the SVG file directly in the notebook
svg_path = 'output_Col4a1_colored_peaks_final.svg'
# Define the custom title
custom_title = 'Coverage Plot for Col4a1 Region'
# Display the SVG with a title
display(SVG(filename=svg_path))
print(custom_title)
Coverage Plot for Col4a1 Region
Fstl1
In [77]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
"""
genes_section = """
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 1
title = Fstl1 gene 3' UTR
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
display = collapsed
labels = false
[spacer]
height = .5
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = .5
title = Chromosomal coordinates
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
# Add additional arguments.
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized with updated names and colors
files_to_plot = {
"BM_KO.bw": ("KO Condition Coverage", "orange"),
"BM_WT.bw": ("WT Condition Coverage", "blue"),
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file, (title, color) in files_to_plot.items():
file_path = output_bw_dir / bw_file
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=12500, color=color)
# Add genes section with collapsed display
ini_content += genes_section
# Add the converted peaks track
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", labels=False, color="black", height=1
)
# Additional files for IP2 and IgG IP coverage
additional_files_to_plot = {
"IP2_MLTC-1_5.bw": ("Zfp36L2 IP", "red"),
"Input3_MLTC-1_6.bw": ("IgG IP", "purple"),
}
bw_max_value = 15000 # Specific maximum value for both input and IP bam coverage tracks
# Add additional coverage tracks
for bw_file, (title, color) in additional_files_to_plot.items():
file_path = output_bw_dir / bw_file
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=bw_max_value, color=color)
# Add vhighlight for WTTTW regions in yellow
ini_content += f"""
[WTTTW Highlights]
file = {beds_dir / 'WTTTW_Fstl1.bed'}
type = vhighlight
color = yellow
alpha = 0.3
"""
# Add vhighlight for ATTTA sequences in green
ini_content += f"""
[ATTTA Highlights]
file = {beds_dir / 'ATTTA_Fstl1.bed'}
type = vhighlight
color = green
alpha = 0.7
"""
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Fstl1.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully for Fstl1.")
Configuration file generated successfully for Fstl1.
In [78]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene Fstl1 based on the given specification
region = "chr16:37833687-37836958"
# Generate the pyGenomeTracks plot in PNG format
subprocess.run([
"pyGenomeTracks",
"--tracks", "bed_and_gtf_tracks_with_peaks_Fstl1.ini",
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "600",
"-o", "output_Fstl1_colored_peaks_final.svg"
])
print("Plot generated successfully in PNG format for Fstl1.")
INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [KO Condition Coverage] INFO:pygenometracks.tracksClass:initialize 4. [WT Condition Coverage] INFO:pygenometracks.tracksClass:initialize 5. [genes arrow] 100%|██████████| 12/12 [00:00<00:00, 9845.78it/s] INFO:pygenometracks.tracksClass:initialize 6. [spacer] INFO:pygenometracks.tracksClass:initialize 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 7. [Reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 8/8 [00:00<00:00, 54560.05it/s] INFO:pygenometracks.tracksClass:initialize 8. [Zfp36L2 IP] INFO:pygenometracks.tracksClass:initialize 9. [IgG IP] INFO:pygenometracks.tracksClass:initialize 12. [x-axis] INFO:pygenometracks.tracksClass:initialize 13. [spacer] INFO:pygenometracks.tracksClass:initialize 10. [WTTTW Highlights] 100%|██████████| 44/44 [00:00<00:00, 138758.93it/s] INFO:pygenometracks.tracksClass:initialize 11. [ATTTA Highlights] 100%|██████████| 4/4 [00:00<00:00, 48210.39it/s] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.4955000877380371 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 19.680851063829788. Dpi is set to 600 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [KO Condition Coverage] INFO:pygenometracks.tracksClass:plotting 4. [WT Condition Coverage] INFO:pygenometracks.tracksClass:plotting 5. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 6. [spacer] INFO:pygenometracks.tracksClass:plotting 7. [Reproducible eCLIP peaks] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [Zfp36L2 IP] INFO:pygenometracks.tracksClass:plotting 9. [IgG IP] INFO:pygenometracks.tracksClass:plotting 12. [x-axis] INFO:pygenometracks.tracksClass:plotting 13. [spacer]
Plot generated successfully in PNG format for Fstl1.
In [79]:
# %%
from IPython.display import SVG, display
# Display the SVG file directly in the notebook
svg_path = 'output_Fstl1_colored_peaks_final.svg'
# Define the custom title
custom_title = 'Coverage Plot for Fstl1 Region'
# Display the SVG with a title
display(SVG(filename=svg_path))
print(custom_title)
Coverage Plot for Fstl1 Region
Lpl
In [4]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
"""
genes_section = """
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 1
title = Lpl gene 3' UTR
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
display = collapsed
labels = false
[spacer]
height = .5
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = .5
title = Chromosomal coordinates
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
# Add additional arguments.
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized with updated names and colors
files_to_plot = {
"BM_KO.bw": ("KO Condition Coverage", "orange"),
"BM_WT.bw": ("WT Condition Coverage", "blue"),
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file, (title, color) in files_to_plot.items():
file_path = output_bw_dir / bw_file
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=15000, color=color)
# Add genes section with collapsed display
ini_content += genes_section
# Add the converted peaks track
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="expanded", style="flybase", labels=False, color="black", height=1
)
# Additional files for IP2 and IgG IP coverage
additional_files_to_plot = {
"IP2_MLTC-1_5.bw": ("Zfp36L2 IP", "red"),
"Input3_MLTC-1_6.bw": ("IgG IP", "purple"),
}
bw_max_value = 20000 # Specific maximum value for both input and IP bam coverage tracks
# Add additional coverage tracks
for bw_file, (title, color) in additional_files_to_plot.items():
file_path = output_bw_dir / bw_file
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=bw_max_value, color=color)
# Add vhighlight for WTTTW regions in yellow
ini_content += f"""
[WTTTW Highlights]
file = {beds_dir / 'WTTTW_Lpl.bed'}
type = vhighlight
color = yellow
alpha = 0.3
"""
# Add vhighlight for ATTTA sequences in green
ini_content += f"""
[ATTTA Highlights]
file = {beds_dir / 'ATTTA_Lpl.bed'}
type = vhighlight
color = green
alpha = 0.7
"""
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Lpl.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully for Lpl.")
Configuration file generated successfully for Lpl.
In [5]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene Lpl based on the given specification
region = "chr8:68904334-68907765"
# Generate the pyGenomeTracks plot in PNG format
subprocess.run([
"pyGenomeTracks",
"--tracks", "bed_and_gtf_tracks_with_peaks_Lpl.ini",
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "600",
"-o", "output_Lpl_colored_peaks_final.svg"
])
print("Plot generated successfully in PNG format for Lpl.")
INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [KO Condition Coverage] INFO:pygenometracks.tracksClass:initialize 4. [WT Condition Coverage] INFO:pygenometracks.tracksClass:initialize 5. [genes arrow] 100%|██████████| 14/14 [00:00<00:00, 7310.79it/s] INFO:pygenometracks.tracksClass:initialize 6. [spacer] INFO:pygenometracks.tracksClass:initialize 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* display: 'expanded' for section 7. [Reproducible eCLIP peaks] is not valid. display has been set to stacked. 100%|██████████| 4/4 [00:00<00:00, 33222.21it/s] INFO:pygenometracks.tracksClass:initialize 8. [Zfp36L2 IP] INFO:pygenometracks.tracksClass:initialize 9. [IgG IP] INFO:pygenometracks.tracksClass:initialize 12. [x-axis] INFO:pygenometracks.tracksClass:initialize 13. [spacer] INFO:pygenometracks.tracksClass:initialize 10. [WTTTW Highlights] 100%|██████████| 56/56 [00:00<00:00, 78241.51it/s] INFO:pygenometracks.tracksClass:initialize 11. [ATTTA Highlights] 100%|██████████| 9/9 [00:00<00:00, 60787.01it/s] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.5399680137634277 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 19.680851063829788. Dpi is set to 600 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [KO Condition Coverage] INFO:pygenometracks.tracksClass:plotting 4. [WT Condition Coverage] INFO:pygenometracks.tracksClass:plotting 5. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 6. [spacer] INFO:pygenometracks.tracksClass:plotting 7. [Reproducible eCLIP peaks] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [Zfp36L2 IP] INFO:pygenometracks.tracksClass:plotting 9. [IgG IP] INFO:pygenometracks.tracksClass:plotting 12. [x-axis] INFO:pygenometracks.tracksClass:plotting 13. [spacer]
Plot generated successfully in PNG format for Lpl.
In [6]:
# %%
from IPython.display import SVG, display
# Display the SVG file directly in the notebook
svg_path = 'output_Lpl_colored_peaks_final.svg'
# Define the custom title
custom_title = 'Coverage Plot for Lpl Region'
# Display the SVG with a title
display(SVG(filename=svg_path))
print(custom_title)
Coverage Plot for Lpl Region
Lox
In [ ]:
# %%
import os
from pathlib import Path
# Set up the directories
base_dir = Path("/mnt/work_1/gsgeorge/mus_musculus/coverage_Pgbd5")
output_bw_dir = base_dir / "ECLIPSE_BIO_SFTP" / "output_bw"
beds_dir = base_dir / "ECLIPSE_BIO_SFTP" / "files"
# Create an .ini content for pyGenomeTracks
header = """
[x-axis]
where = top
fontsize = 12
[spacer]
height = .5
"""
genes_section = """
[genes arrow]
file = gencode.vM25.annotation.gtf
height = 1
title = Lox gene 3' UTR
file_type = gtf
style = UCSC
arrow_interval = 10
fontsize = 10
color = green
display = collapsed
labels = false
[spacer]
height = .5
"""
footer = """
[x-axis]
fontsize = 12
[spacer]
height = .5
title = Chromosomal coordinates
"""
def generate_track_section(file_path, title, file_type, color=None, min_value=None, max_value=None, **kwargs):
section = f"""
[{title}]
file = {file_path}
height = 3
title = {title}
file_type = {file_type}
"""
if color:
section += f"color = {color}\n"
if min_value is not None:
section += f"min_value = {min_value}\n"
if max_value is not None:
section += f"max_value = {max_value}\n"
# Add additional arguments.
for key, value in kwargs.items():
section += f"{key} = {value}\n"
return section
# Files to be visualized with updated names and colors
files_to_plot = {
"BM_KO.bw": ("KO Condition Coverage", "orange"),
"BM_WT.bw": ("WT Condition Coverage", "blue"),
}
# Generate the .ini content
ini_content = header
# Add BM_KO and BM_WT coverage tracks
for bw_file, (title, color) in files_to_plot.items():
file_path = output_bw_dir / bw_file
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=6000, color=color)
# Add genes section with collapsed display
ini_content += genes_section
# Add the converted peaks track
converted_peaks_file = beds_dir / "ZPF36L2.reproducible_eclip_peaks.bed"
ini_content += generate_track_section(
converted_peaks_file, 'Reproducible eCLIP peaks', 'bed',
display="collapsed", style="exon_arrows", labels=False, color="black", height=1
)
# Additional files for IP2 and IgG IP coverage
additional_files_to_plot = {
"IP2_MLTC-1_5.bw": ("Zfp36L2 IP", "red"),
"Input3_MLTC-1_6.bw": ("IgG IP", "purple"),
}
bw_max_value = 9000 # Specific maximum value for both input and IP bam coverage tracks
# Add additional coverage tracks
for bw_file, (title, color) in additional_files_to_plot.items():
file_path = output_bw_dir / bw_file
ini_content += generate_track_section(file_path, title, 'bigwig', min_value=0, max_value=bw_max_value, color=color)
# Add vhighlight for WTTTW regions in yellow
ini_content += f"""
[WTTTW Highlights]
file = {beds_dir / 'WTTTW_Lox.bed'}
type = vhighlight
color = yellow
alpha = 0.3
"""
# Add vhighlight for ATTTA sequences in green
ini_content += f"""
[ATTTA Highlights]
file = {beds_dir / 'ATTTA_Lox.bed'}
type = vhighlight
color = green
alpha = 0.7
"""
ini_content += footer
# Write the configuration content to a file
config_file_path = "bed_and_gtf_tracks_with_peaks_Lox.ini"
with open(config_file_path, "w") as config_file:
config_file.write(ini_content)
print("Configuration file generated successfully.")
Configuration file generated successfully.
In [84]:
# %%
# Import necessary library to run shell commands within the notebook
import subprocess
# Define the region for the gene Lox based on the given specification
region = "chr18:52515956-52519292"
# Generate the pyGenomeTracks plot in PNG format
subprocess.run([
"pyGenomeTracks",
"--tracks", "bed_and_gtf_tracks_with_peaks_Lox.ini",
"--region", region,
"--trackLabelFraction", "0.2",
"--width", "38",
"--dpi", "600",
"-o", "output_Lox_colored_peaks_final.svg"
])
print("Plot generated successfully in svg format.")
INFO:pygenometracks.tracksClass:initialize 1. [x-axis] INFO:pygenometracks.tracksClass:initialize 2. [spacer] INFO:pygenometracks.tracksClass:initialize 3. [KO Condition Coverage] INFO:pygenometracks.tracksClass:initialize 4. [WT Condition Coverage] INFO:pygenometracks.tracksClass:initialize 5. [genes arrow] 100%|██████████| 13/13 [00:00<00:00, 9328.65it/s] INFO:pygenometracks.tracksClass:initialize 6. [spacer] INFO:pygenometracks.tracksClass:initialize 7. [Reproducible eCLIP peaks] WARNING:pygenometracks.tracks.GenomeTrack:*WARNING* style: 'exon_arrows' for section 7. [Reproducible eCLIP peaks] is not valid. style has been set to flybase. 100%|██████████| 2/2 [00:00<00:00, 21675.99it/s] INFO:pygenometracks.tracksClass:initialize 8. [Zfp36L2 IP] INFO:pygenometracks.tracksClass:initialize 9. [IgG IP] INFO:pygenometracks.tracksClass:initialize 12. [x-axis] INFO:pygenometracks.tracksClass:initialize 13. [spacer] INFO:pygenometracks.tracksClass:initialize 10. [WTTTW Highlights] 100%|██████████| 72/72 [00:00<00:00, 79283.25it/s] INFO:pygenometracks.tracksClass:initialize 11. [ATTTA Highlights] 100%|██████████| 10/10 [00:00<00:00, 69557.28it/s] INFO:pygenometracks.tracksClass:time initializing track(s): INFO:pygenometracks.tracksClass:0.514624834060669 DEBUG:pygenometracks.tracksClass:Figure size in cm is 38.0 x 19.680851063829788. Dpi is set to 600 INFO:pygenometracks.tracksClass:plotting 1. [x-axis] INFO:pygenometracks.tracksClass:plotting 2. [spacer] INFO:pygenometracks.tracksClass:plotting 3. [KO Condition Coverage] INFO:pygenometracks.tracksClass:plotting 4. [WT Condition Coverage] INFO:pygenometracks.tracksClass:plotting 5. [genes arrow] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 6. [spacer] INFO:pygenometracks.tracksClass:plotting 7. [Reproducible eCLIP peaks] DEBUG:pygenometracks.tracks.GenomeTrack:ylim 1.08,-0.08 DEBUG:pygenometracks.tracks.GenomeTrack:ylim (1.08, -0.08) INFO:pygenometracks.tracksClass:plotting 8. [Zfp36L2 IP] INFO:pygenometracks.tracksClass:plotting 9. [IgG IP] INFO:pygenometracks.tracksClass:plotting 12. [x-axis] INFO:pygenometracks.tracksClass:plotting 13. [spacer]
Plot generated successfully in svg format.
In [85]:
# %%
from IPython.display import SVG, display
# Display the SVG file directly in the notebook
svg_path = 'output_Lox_colored_peaks_final.svg'
# Define the custom title
custom_title = 'Coverage Plot for Lox Region'
# Display the SVG with a title
display(SVG(filename=svg_path))
print(custom_title)
Coverage Plot for Lox Region